library(tidyverse)
library(lubridate) # work with dates
library(magrittr) # more pipes
library(broom) # tidy regression outputs
library(fixest) # fixest::feols() is very fast with big data and fixed effects
library(patchwork) # for combining plots
library(ggthemes) # for ggthemes
library(knitr) # for making regression tables in Latex
shift <- data.table::shift # for lagging/leading variables


#### Load in and tidy data ####
mp_df <- readRDS("data/analysis/mp_df.rds")

## Load in climate terms
climterms <- readRDS("data/output/climgenterms.rds")

## Load in petition speech data
restoring_raw <- read_csv("data/analysis/petition_20191028_restoringnature.csv")
deforestation_raw <- read_csv("data/analysis/petition_20191007_deforestation.csv")

## Filter petition speeches for climate terms

# 2019-10-07 debate
deforestation_df <- deforestation_raw %>% 
  mutate(speech_text_20191007 = tolower(speech_text_20191007)) %>% 
  mutate(climate_flag = str_count(speech_text_20191007, climterms)) %>% 
  group_by(about) %>% 
  mutate(n = row_number(),
         sum_climate_speeches = sum(climate_flag),
         sum_speeches = max(n)) %>% 
  filter(n == 1) %>% 
  mutate(about = as.numeric(about)) %>% 
  select(about, mp_name, 
         sum_climate_20191007 = sum_climate_speeches, 
         sum_speeches_20191007 = sum_speeches)

# 2019-10-28 debate
restoring_df <- restoring_raw %>% 
  mutate(speech_text_20191028 = tolower(speech_text_20191028)) %>% 
  mutate(climate_flag = str_count(speech_text_20191028, climterms)) %>% 
  group_by(about) %>% 
  mutate(n = row_number(),
         sum_climate_speeches = sum(climate_flag),
         sum_speeches = max(n)) %>% 
  filter(n == 1) %>% 
  mutate(about = as.numeric(about)) %>% 
  select(about, mp_name, 
         sum_climate_20191028 = sum_climate_speeches, 
         sum_speeches_20191028 = sum_speeches)


## Tidy MP data to get cumulative protests and covariates

# Deforestation debate
deforestation_analysis <- mp_df %>% 
  # Truncate FFF count for cumulative sum
  mutate(fff_cutoff_20191007 = ifelse(date <= "2019-10-07", sum_fff_events, 0)) %>% 
  mutate(party_value = ifelse(party_value=="Labour (Co-op)", "Labour", party_value)) %>% 
  # Cumulatively sum local protests and political speech
  group_by(full_name) %>% 
  mutate(cumulative_fff_protests_all_time = cumsum(sum_fff_events),
         cumulative_fff_protests_20191007 = cumsum(fff_cutoff_20191007),
         cumulative_ctweets = cumsum(sum_ctweets),
         cumulative_cspchs = cumsum(sum_cspchs),
         cumulative_tweets = cumsum(sum_tweets),
         cumulative_spchs = cumsum(sum_spchs)) %>% 
  # Sum local protests within month
  group_by(full_name, year_month) %>% 
  mutate(sum_fff_year_month = sum(sum_fff_events)) %>% 
  # Sum local protests within week
  group_by(full_name, year_week) %>% 
  mutate(sum_fff_year_week = sum(sum_fff_events)) %>% 
  # Lag protests by month, week
  group_by(full_name) %>% 
  mutate(sum_fff_lastweek = shift(sum_fff_year_week, type = "lag", n = 7),
         sum_fff_lastmonth = shift(sum_fff_year_month, type = "lag", n = 30)) %>% 
  # Binary variable for speaking on petition day 
  ungroup() %>% 
  filter(date == "2019-10-07") %>% 
  mutate(speak_20191007 = as.numeric(as.logical(sum_spchs))) %>% 
  # Merge petition debate data with MP data
  full_join(., deforestation_df, by = "about") %>% 
  mutate(sum_speeches_20191007 = ifelse(is.na(sum_speeches_20191007), 0, sum_speeches_20191007),
         sum_climate_20191007 = ifelse(is.na(sum_climate_20191007), 0, sum_climate_20191007)) 

# Restoring nature debate
restoring_analysis <- mp_df %>% 
  mutate(fff_cutoff_20191028 = ifelse(date <= "2019-10-28", sum_fff_events, 0)) %>% 
  mutate(party_value = ifelse(party_value=="Labour (Co-op)", "Labour", party_value)) %>% 
  group_by(full_name) %>% 
  mutate(cumulative_fff_protests_all_time = cumsum(sum_fff_events),
         cumulative_fff_protests_20191028 = cumsum(fff_cutoff_20191028),
         cumulative_ctweets = cumsum(sum_ctweets),
         cumulative_cspchs = cumsum(sum_cspchs),
         cumulative_tweets = cumsum(sum_tweets),
         cumulative_spchs = cumsum(sum_spchs)) %>% 
  group_by(full_name, year_month) %>% 
  mutate(sum_fff_year_month = sum(sum_fff_events)) %>% 
  group_by(full_name, year_week) %>% 
  mutate(sum_fff_year_week = sum(sum_fff_events)) %>% 
  group_by(full_name) %>% 
  mutate(sum_fff_lastweek = shift(sum_fff_year_week, type = "lag", n = 7),
         sum_fff_lastmonth = shift(sum_fff_year_month, type = "lag", n = 30)) %>% 
  ungroup() %>% 
  filter(date == "2019-10-28") %>% 
  mutate(speak_20191028 = as.numeric(as.logical(sum_spchs))) %>% 
  full_join(., restoring_df, by = "about") %>% 
  mutate(sum_speeches_20191028 = ifelse(is.na(sum_speeches_20191028), 0, sum_speeches_20191028),
         sum_climate_20191028 = ifelse(is.na(sum_climate_20191028), 0, sum_climate_20191028)) 

#### Regression models ####

# Deforestation debate, FFF in September
lm_deforestation <- deforestation_analysis %>% 
  # Make speech count binary
  mutate(sum_climate_20191007 = as.logical(sum_climate_20191007)) %>%
  # Make lag of protest binary
  mutate(sum_fff_lastmonth = as.logical(sum_fff_lastmonth)) %>% 
  # Log total speeches
  mutate(cumulative_spchs = log(1+cumulative_spchs)) %>% 
  # Regress
  lm(sum_climate_20191007 ~ 
       sum_fff_lastmonth + 
       cumulative_fff_protests_20191007 +
       sum_fff_lastmonth:cumulative_fff_protests_20191007 +
       cumulative_cspchs + 
       party_value +
       frontbench +
       speak_20191007 +
       cumulative_spchs,
     data = .) 


# Restoring nature debate, FFF in September
lm_restoring <- restoring_analysis %>% 
  # Make speech count binary
  mutate(sum_climate_20191028 = as.logical(sum_climate_20191028)) %>% 
  # Make lag of protest binary
  mutate(sum_fff_lastmonth = as.logical(sum_fff_lastmonth)) %>% 
  # Log total speeches
  mutate(cumulative_spchs = log(1+cumulative_spchs)) %>% 
  # Regress
  lm(sum_climate_20191028 ~ 
       sum_fff_lastmonth + 
       cumulative_fff_protests_20191028 +
       sum_fff_lastmonth:cumulative_fff_protests_20191028 +
       cumulative_cspchs + 
       party_value +
       frontbench +
       speak_20191028 +
       cumulative_spchs,
     data = .) 



## Combine HTE speech models into table
table_targeted_speeches <- bind_rows(lm_deforestation %>% tidy() %>% mutate(model = 1),
          lm_restoring %>% tidy() %>% mutate(model = 2)) %>% 
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "model")) %>%
  mutate_at(vars(estimate:std_error), ~format(round(., digits = 3),
                                              nsmall = 3)) %>%
  mutate_all(~as.character(.)) %>%
  pivot_longer(names_to = "param", values_to = "value",
               estimate:p_value) %>%
  mutate(value = ifelse(param == "std_error",
                        paste("(", value, ")", sep = ""), value)) %>%
  group_by(model, term) %>% 
  mutate(p_value = as.numeric(max(ifelse(param=="p_value", value, 0)))) %>% 
  mutate(value = ifelse(param == "estimate" & (p_value < 0.05 & p_value > 0.01),
                        paste(value, "*", sep = ""),
                        ifelse(param == "estimate" & (p_value < 0.01),
                               paste(value, "**", sep = ""),
                               value))) %>% 
  select(-p_value) %>% 
  ungroup() %>% 
  filter(param != "t_statistic",
         param != "p_value") %>%
  pivot_wider(names_from = "model", values_from = "value",
              names_prefix = "P") %>%
  mutate(term = case_when(term == "sum_fff_lastmonthTRUE" ~ "FFF (September)",
                          term == "cumulative_fff_protests_20191007" ~ "FFF (cumulative)",
                          term == "cumulative_fff_protests_20191028" ~ "FFF (cumulative)",
                          term == "sum_fff_lastmonthTRUE:cumulative_fff_protests_20191007" ~ "FFF (interaction)",
                          term == "sum_fff_lastmonthTRUE:cumulative_fff_protests_20191028" ~ "FFF (interaction)",
                          term == "cumulative_cspchs" ~ "Climate speeches (cumulative)",
                          term == "cumulative_spchs" ~ "Commons speeches (cumulative, log)",
                          term == "speak_20191028" ~ "Commons speech day of debate",
                          term == "speak_20191007" ~ "Commons speech day of debate",
                          term == "frontbench" ~ "Frontbench",
                          term == "(Intercept)" ~ "Intercept",
                          term == "nobs" ~ "Observations",
                          term == "rsq" ~ "R-squared")) %>%
  pivot_longer(names_to = "model", values_to = "estimate", P1:P2) %>% 
  na.omit() %>% 
  pivot_wider(names_from = "model", values_from = "estimate") %>% 
  mutate(order = case_when(term == "FFF (September)" ~ 1,
                           term == "FFF (cumulative)" ~ 2,
                           term == "FFF (interaction)" ~ 3,
                           term == "Climate speeches (cumulative)" ~ 4,
                           term == "Commons speeches (cumulative, log)" ~ 5,
                           term == "Frontbench" ~ 6,
                           term == "Commons speech day of debate" ~ 6.1,
                           term == "Intercept" ~ 7,
                           term == "Observations" ~ 98,
                           term == "R-squared" ~ 99)) %>% 
  arrange(order, param) %>% 
  mutate_all(~ifelse(is.na(.), "", .)) %>%
  mutate(term = ifelse(param == "std_error", "", term)) %>% 
  # Drop controls
  filter(order > 90 | order <= 3) %>%
  add_row(term = "Debate",  P1 = "Deforestation", P2 = "Restore nature") %>% 
  add_row(term = "Outcome",  P1 = "Speeches", P2 = "Speeches") %>% 
  add_row(term = "Covariates", P1 = "Yes", P2 = "Yes") %>% 
  add_row(term = "R-squared", param = "rsq",
          P1 = as.character(round(summary(lm_deforestation)$r.sq, 3)),
          P2 = as.character(round(summary(lm_restoring)$r.sq, 3))) %>%
  add_row(term = "Observations", param = "nobs",
          P1 = as.character(nobs(lm_deforestation)),
          P2 = as.character(nobs(lm_restoring))) %>%
  select(-param, -order) %>%
  select(term, M11 = P1, M12 = P2) %>% 
  # {. ->> int_hte_speeches } %>%
  # filter(term!="") %>% # Remove the standard error term
  knitr::kable(.,
               format = "latex",
               booktabs = T,
               linesep = "")
table_targeted_speeches

gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  # "r.squared", "R2",            2,
  "nobs",      "Observations",             0,
  "FE: full_name",  "MP fixed effect",      0,
  "FE: year_month", "Year-month fixed effect", 0,
  "FE: year_week", "Year-week fixed effect", 0)

# Table APP-12
full_petition_latex <- modelsummary(models = list(lm_deforestation, lm_restoring), 
             output = "latex",
             coef_rename = c(
               "sum_fff_lastmonthTRUE" = "FFF (September)",
               "cumulative_fff_protests_20191007" = "FFF (cumulative, 2019-10-07)",
               "sum_fff_lastmonthTRUE:cumulative_fff_protests_20191007" = "FFF (Sept. × cumulative, 2019-10-07)",
               "cumulative_fff_protests_20191028" = "FFF (cumulative, 2019-10-28)",
               "sum_fff_lastmonthTRUE:cumulative_fff_protests_20191028" = "FFF (Sept. × cumulative, 2019-10-28)",
               "cumulative_cspchs" = "Climate speeches (cumulative)",
               "cumulative_spchs" = "Cumulative speeches",
               "frontbench" = "Frontbench",
               "party_valueDemocratic Unionist Party" = "Democratic Unionist Party",
               "party_valueGreen Party" = "Green Party",
               "party_valueLabour" = "Labour",
               "party_valueLiberal Democrat" = "Liberal Democrat",
               "party_valuePlaid Cymru" = "Plaid Cymru",
               "party_valueScottish National Party" = "Scottish National Party",
               "speak_20191007" = "Speak (2019-10-07)",
               "speak_20191028" = "Speak (2019-10-28)"),
             gof_map = gm, 
             title = 'MP parliamentary speeches in petition window',
             stars = c("*" = 0.05, "**" = 0.01))
full_petition_latex
